notebook.community

Edit and run



In [640]:

    
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn import metrics
from matplotlib.mlab import PCA as mlabPCA
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from sklearn import preprocessing
from sklearn.feature_selection import SelectKBest
import seaborn as sns
import scipy.stats as stats
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score, KFold
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification
from sklearn.feature_selection import RFE
from sklearn.model_selection import cross_val_predict
from sklearn import metrics
from sklearn.decomposition import PCA as sklearn_pca
import locale
from locale import atof
import warnings
from IPython.display import display
from sklearn import linear_model
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.feature_selection import f_regression
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import xlrd
from sklearn import ensemble
import time
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn import decomposition
from surprise import SVD, evaluate
from sklearn import neighbors
from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn import ensemble



In [641]:

    
# Read and import data
answers = pd.read_csv('WOW Raw Data HST n468.csv', encoding='latin-1')
answers.head()









    Out[641]:







  
    
      
      StartDate
      EndDate
      Status
      IPAddress
      Progress
      Duration (in seconds)
      Finished
      RecordedDate
      ResponseId
      RecipientLastName
      ...
      Orderer_Score
      Influencer_Score
      Benefactor_Score
      Harmonizer_Score
      Investigator_Score
      Quantifier_Score
      Distiller_Score
      Innovator_Score
      Creator_Score
      University - Topics
    
  
  
    
      0
      Start Date
      End Date
      Response Type
      IP Address
      Progress
      Duration (in seconds)
      Finished
      Recorded Date
      Response ID
      Recipient Last Name
      ...
      Orderer_Score
      Influencer_Score
      Benefactor_Score
      Harmonizer_Score
      Investigator_Score
      Quantifier_Score
      Distiller_Score
      Innovator_Score
      Creator_Score
      University - Topics
    
    
      1
      {"ImportId":"startDate","timeZone":"Europe/Ber...
      {"ImportId":"endDate","timeZone":"Europe/Berlin"}
      {"ImportId":"status"}
      {"ImportId":"ipAddress"}
      {"ImportId":"progress"}
      {"ImportId":"duration"}
      {"ImportId":"finished"}
      {"ImportId":"recordedDate","timeZone":"Europe/...
      {"ImportId":"_recordId"}
      {"ImportId":"recipientLastName"}
      ...
      {"ImportId":"Orderer_Score"}
      {"ImportId":"Influencer_Score"}
      {"ImportId":"Benefactor_Score"}
      {"ImportId":"Harmonizer_Score"}
      {"ImportId":"Investigator_Score"}
      {"ImportId":"Quantifier_Score"}
      {"ImportId":"Distiller_Score"}
      {"ImportId":"Innovator_Score"}
      {"ImportId":"Creator_Score"}
      {"ImportId":"University_c0047d9203b54c45913930...
    
    
      2
      22/11/17 19:40
      22/11/17 19:42
      0
      159.147.77.160
      100
      101
      1
      22/11/17 19:42
      R_1FEes0inThNaSwJ
      Arevalo
      ...
      12
      11
      11
      11
      12
      10
      11
      11
      14
      Unknown
    
    
      3
      22/11/17 19:39
      22/11/17 19:42
      0
      62.151.145.42
      100
      190
      1
      22/11/17 19:42
      R_bswgyUCVb5YgvlP
      Kuckreja
      ...
      7
      6
      15
      13
      10
      5
      9
      13
      5
      NaN
    
    
      4
      22/11/17 19:39
      22/11/17 19:42
      0
      78.30.8.64
      100
      176
      1
      22/11/17 19:42
      R_42yxYo5FhixO5mZ
      Hernndez
      ...
      9
      11
      6
      7
      13
      15
      13
      13
      11
      Unknown
    
  

5 rows × 88 columns



In [642]:

    
#Analyse types of variables
answers.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 470 entries, 0 to 469
Data columns (total 88 columns):
StartDate                470 non-null object
EndDate                  470 non-null object
Status                   470 non-null object
IPAddress                470 non-null object
Progress                 470 non-null object
Duration (in seconds)    470 non-null object
Finished                 470 non-null object
RecordedDate             470 non-null object
ResponseId               470 non-null object
RecipientLastName        470 non-null object
RecipientFirstName       470 non-null object
RecipientEmail           470 non-null object
ExternalReference        2 non-null object
LocationLatitude         470 non-null object
LocationLongitude        470 non-null object
DistributionChannel      469 non-null object
UserLanguage             3 non-null object
Q1                       2 non-null object
Q2                       470 non-null object
Q3                       470 non-null object
Q4                       470 non-null object
Q5                       470 non-null object
Q6                       470 non-null object
Q7                       470 non-null object
Q8                       470 non-null object
Q9                       470 non-null object
Q10                      470 non-null object
Q11                      470 non-null object
Q12                      470 non-null object
Q13                      470 non-null object
Q14                      470 non-null object
Q15                      470 non-null object
Q16                      470 non-null object
Q17                      470 non-null object
Q18                      470 non-null object
Q19                      470 non-null object
Q20                      470 non-null object
Q21                      470 non-null object
Q22                      470 non-null object
Q23                      470 non-null object
Q24                      470 non-null object
Q25                      470 non-null object
Q26                      470 non-null object
Q27                      470 non-null object
Q28                      470 non-null object
Q29                      470 non-null object
Q30                      470 non-null object
Q31                      470 non-null object
Q44_1                    466 non-null object
Q44_2                    465 non-null object
Q44_3                    465 non-null object
SC0                      470 non-null object
SC1                      470 non-null object
SC2                      470 non-null object
SC3                      470 non-null object
SC4                      470 non-null object
SC5                      470 non-null object
SC6                      470 non-null object
SC7                      470 non-null object
SC8                      470 non-null object
SC9                      470 non-null object
Program                  470 non-null object
Section                  470 non-null object
Type of Degree           470 non-null object
Gender                   470 non-null object
City                     305 non-null object
Country                  429 non-null object
PostCode                 187 non-null object
Age                      253 non-null object
Birth                    460 non-null object
Nationality              333 non-null object
Degree                   253 non-null object
Title                    251 non-null object
University               251 non-null object
PrivateEmail             3 non-null object
Address                  3 non-null object
Identification           3 non-null object
Catalyst_Score           470 non-null object
Orderer_Score            470 non-null object
Influencer_Score         470 non-null object
Benefactor_Score         470 non-null object
Harmonizer_Score         470 non-null object
Investigator_Score       470 non-null object
Quantifier_Score         470 non-null object
Distiller_Score          470 non-null object
Innovator_Score          470 non-null object
Creator_Score            470 non-null object
University - Topics      106 non-null object
dtypes: object(88)
memory usage: 323.2+ KB



In [643]:

    
#Identify all the columns in the file
answers.columns









    Out[643]:





Index(['StartDate', 'EndDate', 'Status', 'IPAddress', 'Progress',
       'Duration (in seconds)', 'Finished', 'RecordedDate', 'ResponseId',
       'RecipientLastName', 'RecipientFirstName', 'RecipientEmail',
       'ExternalReference', 'LocationLatitude', 'LocationLongitude',
       'DistributionChannel', 'UserLanguage', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5',
       'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12', 'Q13', 'Q14', 'Q15', 'Q16',
       'Q17', 'Q18', 'Q19', 'Q20', 'Q21', 'Q22', 'Q23', 'Q24', 'Q25', 'Q26',
       'Q27', 'Q28', 'Q29', 'Q30', 'Q31', 'Q44_1', 'Q44_2', 'Q44_3', 'SC0',
       'SC1', 'SC2', 'SC3', 'SC4', 'SC5', 'SC6', 'SC7', 'SC8', 'SC9',
       'Program', 'Section', 'Type of Degree', 'Gender', 'City', 'Country',
       'PostCode', 'Age', 'Birth', 'Nationality', 'Degree', 'Title',
       'University', 'PrivateEmail', 'Address', 'Identification',
       'Catalyst_Score', 'Orderer_Score', 'Influencer_Score',
       'Benefactor_Score', 'Harmonizer_Score', 'Investigator_Score',
       'Quantifier_Score', 'Distiller_Score', 'Innovator_Score',
       'Creator_Score', 'University - Topics'],
      dtype='object')



In [644]:

    
#Identify unique values in the Section Area
answers.Section.unique()









    Out[644]:





array(['Section', '{"ImportId":"Section"}', 'MVDM-01', 'MCC-01', 'MBD-01',
       'MCC-02', 'MRCB-01', 'GMBD-01', 'MTDHR-01', 'MBD-02', 'BIP-2015',
       'MVDM-02', 'MCXI-01', 'MCS-01', 'BIC-2016', 'EXMPLS-01', 'MRCB-02',
       'BIP-2016', 'BIP-2017', 'BIC-2015', 'BIS-2017', 'BIS-2016',
       'BIP-2014', 'BIC-2017', 'EMCC-01', 'BIC-2014'], dtype=object)



In [645]:

    
#Drop additional information that we are not using for the model
answers1 = answers.drop(['StartDate', 'EndDate', 'Status', 'IPAddress', 'Progress',
       'Duration (in seconds)', 'Finished', 'RecordedDate', 'ResponseId',
       'RecipientLastName', 'RecipientFirstName', 'RecipientEmail',
       'ExternalReference', 'LocationLatitude', 'LocationLongitude',
       'DistributionChannel', 'UserLanguage','Program','Type of Degree','City', 'Country',
       'PostCode', 'Age', 'Birth', 'Nationality', 'Degree', 'Title',
       'University', 'PrivateEmail', 'Address', 'Identification','University - Topics','Q1'],axis = 1)



In [646]:

    
#Rename columns with row 0 from the original data set
answers1 = answers1.rename(columns=answers.iloc[0])

#Drop row 1 with system information and reset index
answers1 = answers1.drop(answers.index[0:3]).reset_index(drop=True)

answers1.head()









    Out[646]:







  
    
      
      Catalyst-Driving
      Catalyst-Orchestrating
      Catalyst-Activating
      Orderer-Ordering
      Orderer-Risk Reducing
      Orderer-Policing
      Influencer-Communicating
      Influencer-Advocating
      Influencer-Selling
      Benefactor-Defending
      ...
      Catalyst_Score
      Orderer_Score
      Influencer_Score
      Benefactor_Score
      Harmonizer_Score
      Investigator_Score
      Quantifier_Score
      Distiller_Score
      Innovator_Score
      Creator_Score
    
  
  
    
      0
      5
      5
      1
      4
      2
      1
      3
      2
      1
      5
      ...
      11
      7
      6
      15
      13
      10
      5
      9
      13
      5
    
    
      1
      4
      4
      3
      4
      4
      1
      4
      4
      3
      3
      ...
      11
      9
      11
      6
      7
      13
      15
      13
      13
      11
    
    
      2
      5
      4
      5
      1
      2
      4
      4
      3
      5
      3
      ...
      14
      7
      12
      11
      12
      8
      4
      7
      13
      14
    
    
      3
      5
      4
      4
      4
      4
      3
      3
      3
      3
      4
      ...
      13
      11
      9
      14
      15
      12
      8
      11
      11
      10
    
    
      4
      2
      2
      3
      3
      3
      4
      5
      4
      2
      4
      ...
      7
      10
      11
      14
      13
      8
      10
      9
      8
      7
    
  

5 rows × 55 columns



In [647]:

    
#Identify all the columns in the file
answers1.columns









    Out[647]:





Index(['Catalyst-Driving', 'Catalyst-Orchestrating', 'Catalyst-Activating',
       'Orderer-Ordering', 'Orderer-Risk Reducing', 'Orderer-Policing',
       'Influencer-Communicating', 'Influencer-Advocating',
       'Influencer-Selling', 'Benefactor-Defending', 'Benefactor-Empathizing',
       'Benefactor-Developing', 'Harmonizer-Including',
       'Harmonizer-Conflict Reducing', 'Harmonizer-Consensus Building',
       'Investigator-Drilling', 'Investigator-Dissecting',
       'Investigator-Explaining', 'Quantifier-Measuring',
       'Quantifier-Pattern Finding', 'Quantifier-Modeling',
       'Distiler-Packaging', 'Distiler-Simplifying', 'Distiller-Connecting',
       'Innovator-Disrupting', 'Innovator-Brainstorming', 'Innovator-Testing',
       'Creator-Creating', 'Creator-Making', 'Creator-Expressing',
       'NewMastersRatings - Masters in Digital Marketing',
       'NewMastersRatings - Masters in Digital Business & Innovation',
       'NewMastersRatings - Masters in Computer Science & Business Technology',
       'Catalyst', 'Orderer', 'Influencer', 'Benefactor', 'Harmonizer',
       'Investigator', 'Quantifier', 'Distiller', 'Innovator', 'Creator',
       'Section', 'Gender', 'Catalyst_Score', 'Orderer_Score',
       'Influencer_Score', 'Benefactor_Score', 'Harmonizer_Score',
       'Investigator_Score', 'Quantifier_Score', 'Distiller_Score',
       'Innovator_Score', 'Creator_Score'],
      dtype='object')

Subset of data to see responses, programs, gender, etc...



In [648]:

    
answers2 = answers1[['Section','Gender','Catalyst_Score', 'Orderer_Score',
       'Influencer_Score', 'Benefactor_Score', 'Harmonizer_Score',
       'Investigator_Score', 'Quantifier_Score', 'Distiller_Score',
       'Innovator_Score', 'Creator_Score']]
answers2.head()









    Out[648]:







  
    
      
      Section
      Gender
      Catalyst_Score
      Orderer_Score
      Influencer_Score
      Benefactor_Score
      Harmonizer_Score
      Investigator_Score
      Quantifier_Score
      Distiller_Score
      Innovator_Score
      Creator_Score
    
  
  
    
      0
      MCC-01
      Male
      11
      7
      6
      15
      13
      10
      5
      9
      13
      5
    
    
      1
      MBD-01
      Hombre
      11
      9
      11
      6
      7
      13
      15
      13
      13
      11
    
    
      2
      MVDM-01
      Mujer
      14
      7
      12
      11
      12
      8
      4
      7
      13
      14
    
    
      3
      MCC-02
      Female
      13
      11
      9
      14
      15
      12
      8
      11
      11
      10
    
    
      4
      MRCB-01
      Mujer
      7
      10
      11
      14
      13
      8
      10
      9
      8
      7



In [649]:

    
#Assign values to Gender: Male = 0, Female = 1
answers2['Gender'] = answers2['Gender'].map({'Female': 1,'Mujer': 1, 'Male': 0,'Hombre': 0 })

#Map Sections to Programs - high level (i.e. GMBD & MBD = MBD) 
## Not sure about MCC & EMCC for the moment I keep them separate
answers2['Section'] = answers2['Section'].map({'MBD-01': 'MBD',
                                               'MBD-02': 'MBD',
                                               'GMBD-01': 'MBD',
                                               'MCC-01': 'MCC',
                                               'MCC-02': 'MCC', 
                                               'MRCB-01': 'MRCB',
                                               'MRCB-02': 'MRCB',
                                               'MVDM-01': 'MVDM',
                                               'MVDM-02': 'MVDM',
                                               'MTDHR-01': 'MTDHR',
                                               'BIP-2014': 'BIP',
                                               'BIP-2015': 'BIP',
                                               'BIP-2016': 'BIP',
                                               'BIP-2017':'BIP',
                                               'BIS-2016':'BIS',
                                               'BIS-2017':'BIS',
                                               'BIC-2014': 'BIC',
                                               'BIC-2015': 'BIC',
                                               'BIC-2016': 'BIC',
                                               'BIC-2017':'BIC',
                                               'MCXI-01': 'MCXI',
                                               'MCS-01':'MCS',
                                               'EXMPLS-01':'EXMPLS',
                                               'EMCC-01':'MCC'
                                              })



In [650]:

    
answers2.head()









    Out[650]:







  
    
      
      Section
      Gender
      Catalyst_Score
      Orderer_Score
      Influencer_Score
      Benefactor_Score
      Harmonizer_Score
      Investigator_Score
      Quantifier_Score
      Distiller_Score
      Innovator_Score
      Creator_Score
    
  
  
    
      0
      MCC
      0
      11
      7
      6
      15
      13
      10
      5
      9
      13
      5
    
    
      1
      MBD
      0
      11
      9
      11
      6
      7
      13
      15
      13
      13
      11
    
    
      2
      MVDM
      1
      14
      7
      12
      11
      12
      8
      4
      7
      13
      14
    
    
      3
      MCC
      1
      13
      11
      9
      14
      15
      12
      8
      11
      11
      10
    
    
      4
      MRCB
      1
      7
      10
      11
      14
      13
      8
      10
      9
      8
      7



In [651]:

    
#New names for Programs
answers2.Section.unique()









    Out[651]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'BIP', 'MCXI', 'MCS', 'BIC',
       'EXMPLS', 'BIS'], dtype=object)



In [652]:

    
answers2.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467 entries, 0 to 466
Data columns (total 12 columns):
Section               467 non-null object
Gender                467 non-null int64
Catalyst_Score        467 non-null object
Orderer_Score         467 non-null object
Influencer_Score      467 non-null object
Benefactor_Score      467 non-null object
Harmonizer_Score      467 non-null object
Investigator_Score    467 non-null object
Quantifier_Score      467 non-null object
Distiller_Score       467 non-null object
Innovator_Score       467 non-null object
Creator_Score         467 non-null object
dtypes: int64(1), object(11)
memory usage: 43.9+ KB



In [653]:

    
answers2['Gender'].value_counts()









    Out[653]:





1    289
0    178
Name: Gender, dtype: int64



In [654]:

    
answers2['Section'].value_counts()









    Out[654]:





MBD       109
MCC        69
MRCB       56
MVDM       55
BIC        53
BIP        38
BIS        21
MCXI       21
EXMPLS     18
MTDHR      16
MCS        11
Name: Section, dtype: int64

Relationship between variables - Raw



In [655]:

    
ratings = answers1[['Section','Gender','Catalyst-Driving', 'Catalyst-Orchestrating', 'Catalyst-Activating',
       'Orderer-Ordering', 'Orderer-Risk Reducing', 'Orderer-Policing',
       'Influencer-Communicating', 'Influencer-Advocating',
       'Influencer-Selling', 'Benefactor-Defending', 'Benefactor-Empathizing',
       'Benefactor-Developing', 'Harmonizer-Including',
       'Harmonizer-Conflict Reducing', 'Harmonizer-Consensus Building',
       'Investigator-Drilling', 'Investigator-Dissecting',
       'Investigator-Explaining', 'Quantifier-Measuring',
       'Quantifier-Pattern Finding', 'Quantifier-Modeling',
       'Distiler-Packaging', 'Distiler-Simplifying', 'Distiller-Connecting',
       'Innovator-Disrupting', 'Innovator-Brainstorming', 'Innovator-Testing',
       'Creator-Creating', 'Creator-Making', 'Creator-Expressing']]

#Map Sections to Programs - high level (i.e. GMBD & MBD = MBD) 
## Not sure about MCC & EMCC for the moment I keep them separate
ratings['Section'] = answers1['Section'].map({'MBD-01': 'MBD',
                                               'MBD-02': 'MBD',
                                               'GMBD-01': 'MBD',
                                               'MCC-01': 'MCC',
                                               'MCC-02': 'MCC', 
                                               'MRCB-01': 'MRCB',
                                               'MRCB-02': 'MRCB',
                                               'MVDM-01': 'MVDM',
                                               'MVDM-02': 'MVDM',
                                               'MTDHR-01': 'MTDHR',
                                               'BIP-2014': 'BIP',
                                               'BIP-2015': 'BIP',
                                               'BIP-2016': 'BIP',
                                               'BIP-2017':'BIP',
                                               'BIS-2016':'BIS',
                                               'BIS-2017':'BIS',
                                               'BIC-2014': 'BIC',
                                               'BIC-2015': 'BIC',
                                               'BIC-2016': 'BIC',
                                               'BIC-2017':'BIC',
                                               'MCXI-01': 'MCXI',
                                               'MCS-01':'MCS',
                                               'EXMPLS-01':'EXMPLS',
                                               'EMCC-01':'MCC'
                                              })



In [656]:

    
#ratings = ratings[ratings.Section != 'BIC']
#ratings = ratings[ratings.Section != 'BIS']
#ratings = ratings[ratings.Section != 'BIP']
ratings1 = ratings.drop(['Gender','Section'], axis = 1)
print(ratings.Section.unique())
ratings1.head()









    



['MCC' 'MBD' 'MVDM' 'MRCB' 'MTDHR' 'BIP' 'MCXI' 'MCS' 'BIC' 'EXMPLS' 'BIS']






    Out[656]:







  
    
      
      Catalyst-Driving
      Catalyst-Orchestrating
      Catalyst-Activating
      Orderer-Ordering
      Orderer-Risk Reducing
      Orderer-Policing
      Influencer-Communicating
      Influencer-Advocating
      Influencer-Selling
      Benefactor-Defending
      ...
      Quantifier-Modeling
      Distiler-Packaging
      Distiler-Simplifying
      Distiller-Connecting
      Innovator-Disrupting
      Innovator-Brainstorming
      Innovator-Testing
      Creator-Creating
      Creator-Making
      Creator-Expressing
    
  
  
    
      0
      5
      5
      1
      4
      2
      1
      3
      2
      1
      5
      ...
      3
      4
      3
      2
      5
      5
      3
      1
      1
      3
    
    
      1
      4
      4
      3
      4
      4
      1
      4
      4
      3
      3
      ...
      5
      5
      4
      4
      5
      4
      4
      5
      1
      5
    
    
      2
      5
      4
      5
      1
      2
      4
      4
      3
      5
      3
      ...
      2
      2
      2
      3
      5
      5
      3
      5
      5
      4
    
    
      3
      5
      4
      4
      4
      4
      3
      3
      3
      3
      4
      ...
      3
      5
      2
      4
      4
      4
      3
      4
      2
      4
    
    
      4
      2
      2
      3
      3
      3
      4
      5
      4
      2
      4
      ...
      1
      5
      3
      1
      4
      2
      2
      2
      1
      4
    
  

5 rows × 30 columns



In [657]:

    
from sklearn.preprocessing import scale
ratings1 = ratings1.apply(lambda V: scale(V,axis=0,with_mean=True, with_std=True,copy=False),axis=1)



In [658]:

    
#Correlation Matrix between all answers
corrmat = ratings1.corr(method='spearman')

#plot pivot table as heatmap using seaborn
plt.figure(figsize=(30, 10))
ax = sns.heatmap(corrmat, annot=True)
plt.tight_layout()
plt.show()



In [659]:

    
ratings2 = ratings['Section']



In [660]:

    
ratings3 = pd.concat([ratings2, ratings1], axis=1)
ratings3.head()









    Out[660]:







  
    
      
      Section
      Catalyst-Driving
      Catalyst-Orchestrating
      Catalyst-Activating
      Orderer-Ordering
      Orderer-Risk Reducing
      Orderer-Policing
      Influencer-Communicating
      Influencer-Advocating
      Influencer-Selling
      ...
      Quantifier-Modeling
      Distiler-Packaging
      Distiler-Simplifying
      Distiller-Connecting
      Innovator-Disrupting
      Innovator-Brainstorming
      Innovator-Testing
      Creator-Creating
      Creator-Making
      Creator-Expressing
    
  
  
    
      0
      MCC
      1.209416
      1.209416
      -1.382189
      0.561514
      -0.734288
      -1.382189
      -0.086387
      -0.734288
      -1.382189
      ...
      -0.086387
      0.561514
      -0.086387
      -0.734288
      1.209416
      1.209416
      -0.086387
      -1.382189
      -1.382189
      -0.086387
    
    
      1
      MBD
      0.293047
      0.293047
      -0.506172
      0.293047
      0.293047
      -2.104610
      0.293047
      0.293047
      -0.506172
      ...
      1.092266
      1.092266
      0.293047
      0.293047
      1.092266
      0.293047
      0.293047
      1.092266
      -2.104610
      1.092266
    
    
      2
      MVDM
      1.179536
      0.442326
      1.179536
      -1.769303
      -1.032094
      0.442326
      0.442326
      -0.294884
      1.179536
      ...
      -1.032094
      -1.032094
      -1.032094
      -0.294884
      1.179536
      1.179536
      -0.294884
      1.179536
      1.179536
      0.442326
    
    
      3
      MCC
      1.269622
      0.211604
      0.211604
      0.211604
      0.211604
      -0.846415
      -0.846415
      -0.846415
      -0.846415
      ...
      -0.846415
      1.269622
      -1.904433
      0.211604
      0.211604
      0.211604
      -0.846415
      0.211604
      -1.904433
      0.211604
    
    
      4
      MRCB
      -0.924711
      -0.924711
      -0.174945
      -0.174945
      -0.174945
      0.574820
      1.324586
      0.574820
      -0.924711
      ...
      -1.674477
      1.324586
      -0.174945
      -1.674477
      0.574820
      -0.924711
      -0.924711
      -0.924711
      -1.674477
      0.574820
    
  

5 rows × 31 columns



In [661]:

    
ratings3.Section.unique()









    Out[661]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'BIP', 'MCXI', 'MCS', 'BIC',
       'EXMPLS', 'BIS'], dtype=object)



In [662]:

    
means = ratings3.groupby(['Section']).mean()



In [663]:

    
#plot pivot table as heatmap using seaborn
plt.figure(figsize=(30, 10))
ax = sns.heatmap(means, annot=True)
#plt.setp(ax, rotation=90 )
#plt.tight_layout()
plt.show()



In [664]:

    
stdmeans = pd.DataFrame(means.std())
stdmeans









    Out[664]:







  
    
      
      0
    
  
  
    
      Catalyst-Driving
      0.170144
    
    
      Catalyst-Orchestrating
      0.158902
    
    
      Catalyst-Activating
      0.227504
    
    
      Orderer-Ordering
      0.226261
    
    
      Orderer-Risk Reducing
      0.267490
    
    
      Orderer-Policing
      0.354668
    
    
      Influencer-Communicating
      0.171778
    
    
      Influencer-Advocating
      0.228359
    
    
      Influencer-Selling
      0.223396
    
    
      Benefactor-Defending
      0.306280
    
    
      Benefactor-Empathizing
      0.241770
    
    
      Benefactor-Developing
      0.227719
    
    
      Harmonizer-Including
      0.353975
    
    
      Harmonizer-Conflict Reducing
      0.296425
    
    
      Harmonizer-Consensus Building
      0.242120
    
    
      Investigator-Drilling
      0.169565
    
    
      Investigator-Dissecting
      0.263118
    
    
      Investigator-Explaining
      0.289203
    
    
      Quantifier-Measuring
      0.374427
    
    
      Quantifier-Pattern Finding
      0.478853
    
    
      Quantifier-Modeling
      0.290082
    
    
      Distiler-Packaging
      0.263731
    
    
      Distiler-Simplifying
      0.298801
    
    
      Distiller-Connecting
      0.190971
    
    
      Innovator-Disrupting
      0.227926
    
    
      Innovator-Brainstorming
      0.219418
    
    
      Innovator-Testing
      0.222475
    
    
      Creator-Creating
      0.342205
    
    
      Creator-Making
      0.554117
    
    
      Creator-Expressing
      0.392407



In [665]:

    
#Assign values to Gender: Male = 0, Female = 1
ratings['Gender'] = ratings['Gender'].map({'Female': 'Female','Mujer': 'Female', 'Male': 'Male','Hombre': 'Male' })
ratings5 = ratings['Gender']

ratings4 = pd.concat([ratings5, ratings3], axis=1)
ratings4.head()









    Out[665]:







  
    
      
      Gender
      Section
      Catalyst-Driving
      Catalyst-Orchestrating
      Catalyst-Activating
      Orderer-Ordering
      Orderer-Risk Reducing
      Orderer-Policing
      Influencer-Communicating
      Influencer-Advocating
      ...
      Quantifier-Modeling
      Distiler-Packaging
      Distiler-Simplifying
      Distiller-Connecting
      Innovator-Disrupting
      Innovator-Brainstorming
      Innovator-Testing
      Creator-Creating
      Creator-Making
      Creator-Expressing
    
  
  
    
      0
      Male
      MCC
      1.209416
      1.209416
      -1.382189
      0.561514
      -0.734288
      -1.382189
      -0.086387
      -0.734288
      ...
      -0.086387
      0.561514
      -0.086387
      -0.734288
      1.209416
      1.209416
      -0.086387
      -1.382189
      -1.382189
      -0.086387
    
    
      1
      Male
      MBD
      0.293047
      0.293047
      -0.506172
      0.293047
      0.293047
      -2.104610
      0.293047
      0.293047
      ...
      1.092266
      1.092266
      0.293047
      0.293047
      1.092266
      0.293047
      0.293047
      1.092266
      -2.104610
      1.092266
    
    
      2
      Female
      MVDM
      1.179536
      0.442326
      1.179536
      -1.769303
      -1.032094
      0.442326
      0.442326
      -0.294884
      ...
      -1.032094
      -1.032094
      -1.032094
      -0.294884
      1.179536
      1.179536
      -0.294884
      1.179536
      1.179536
      0.442326
    
    
      3
      Female
      MCC
      1.269622
      0.211604
      0.211604
      0.211604
      0.211604
      -0.846415
      -0.846415
      -0.846415
      ...
      -0.846415
      1.269622
      -1.904433
      0.211604
      0.211604
      0.211604
      -0.846415
      0.211604
      -1.904433
      0.211604
    
    
      4
      Female
      MRCB
      -0.924711
      -0.924711
      -0.174945
      -0.174945
      -0.174945
      0.574820
      1.324586
      0.574820
      ...
      -1.674477
      1.324586
      -0.174945
      -1.674477
      0.574820
      -0.924711
      -0.924711
      -0.924711
      -1.674477
      0.574820
    
  

5 rows × 32 columns



In [666]:

    
gendermean = ratings4.groupby(['Section','Gender']).mean()



In [667]:

    
#plot pivot table as heatmap using seaborn
plt.figure(figsize=(30, 10))
ax = sns.heatmap(gendermean, annot=True, square=True)
plt.setp(ax.xaxis.get_majorticklabels(), rotation=90 )
#plt.tight_layout()
plt.show()



In [668]:

    
genderstd = ratings4.groupby(['Section','Gender']).std()
genderstd









    Out[668]:







  
    
      
      
      Catalyst-Driving
      Catalyst-Orchestrating
      Catalyst-Activating
      Orderer-Ordering
      Orderer-Risk Reducing
      Orderer-Policing
      Influencer-Communicating
      Influencer-Advocating
      Influencer-Selling
      Benefactor-Defending
      ...
      Quantifier-Modeling
      Distiler-Packaging
      Distiler-Simplifying
      Distiller-Connecting
      Innovator-Disrupting
      Innovator-Brainstorming
      Innovator-Testing
      Creator-Creating
      Creator-Making
      Creator-Expressing
    
    
      Section
      Gender
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BIC
      Female
      0.904939
      0.872116
      0.822095
      0.973654
      0.809539
      1.175637
      0.881327
      0.915881
      1.009397
      0.838853
      ...
      0.704232
      1.010022
      0.934579
      0.816238
      0.877077
      0.769523
      0.855665
      0.846945
      1.215148
      0.983258
    
    
      Male
      0.916673
      0.949708
      0.568862
      1.066929
      0.670931
      0.943651
      0.833709
      0.735849
      0.926195
      0.963360
      ...
      0.890970
      0.677827
      0.899031
      0.877260
      0.845041
      0.555910
      1.008002
      0.569796
      1.225553
      1.209313
    
    
      BIP
      Female
      0.920899
      0.837324
      0.891017
      0.986207
      1.012120
      1.008770
      0.783151
      1.116202
      0.943666
      0.795912
      ...
      0.805330
      0.946386
      0.984897
      0.980505
      0.850914
      0.703871
      0.700137
      0.821414
      1.291959
      0.985268
    
    
      Male
      1.576784
      0.856735
      0.807865
      1.341317
      0.636537
      0.864993
      0.993638
      0.349579
      0.725915
      0.369349
      ...
      0.769034
      1.030830
      1.118326
      0.813404
      1.245786
      0.831501
      0.985702
      0.925035
      0.470579
      1.211218
    
    
      BIS
      Female
      0.849324
      0.703938
      0.892803
      0.680088
      0.851200
      1.186007
      1.146101
      1.395997
      1.065505
      1.137488
      ...
      1.179748
      1.027930
      0.863574
      0.787213
      1.077894
      0.944229
      0.816371
      0.782312
      1.166962
      1.228054
    
    
      Male
      0.793491
      1.169425
      0.798543
      1.173901
      0.999375
      1.082589
      0.852286
      0.847487
      1.161168
      1.105357
      ...
      0.906073
      0.698020
      0.827069
      1.205712
      0.614826
      0.825017
      0.595281
      0.943332
      1.248330
      1.167417
    
    
      EXMPLS
      Female
      1.152165
      0.876999
      0.923853
      1.121024
      0.629217
      0.763406
      0.662094
      0.615469
      0.671414
      0.850886
      ...
      0.831035
      0.923429
      1.114360
      0.779570
      0.444161
      0.716024
      0.827846
      0.732319
      1.450625
      0.524999
    
    
      Male
      1.098929
      1.148096
      1.092736
      0.961729
      0.923793
      0.824262
      0.920684
      0.721370
      1.199145
      0.695955
      ...
      0.898863
      0.783376
      1.171911
      1.044906
      0.563015
      0.411410
      0.724516
      0.925079
      0.473617
      1.202033
    
    
      MBD
      Female
      1.082836
      0.775090
      0.987653
      1.137435
      0.735068
      0.978456
      1.155907
      1.003446
      1.099620
      0.953381
      ...
      0.897933
      1.123441
      0.659827
      0.729776
      0.720390
      0.683212
      0.966098
      0.959777
      1.066398
      0.980222
    
    
      Male
      1.091367
      0.930980
      1.052609
      1.122787
      0.934723
      1.071034
      0.891503
      0.928695
      1.124414
      0.981050
      ...
      0.932534
      0.910329
      0.865463
      0.799108
      0.922054
      0.702756
      0.880043
      0.944937
      0.969822
      1.142993
    
    
      MCC
      Female
      0.879987
      0.862646
      0.851446
      1.203403
      0.704975
      1.101269
      0.837664
      0.751255
      0.889386
      0.798010
      ...
      1.067279
      0.893329
      0.860317
      0.880564
      0.987756
      0.880946
      0.748833
      0.938364
      1.148180
      1.272019
    
    
      Male
      0.871454
      1.156833
      1.080689
      0.989483
      0.726458
      1.254711
      0.839616
      1.003250
      0.875941
      0.821394
      ...
      0.863381
      1.023079
      0.654271
      0.786529
      0.991654
      0.938773
      1.032405
      0.856433
      1.589282
      0.873722
    
    
      MCS
      Female
      0.638194
      0.752701
      0.752701
      0.123315
      1.267580
      0.752701
      0.638194
      1.249964
      0.638194
      0.752701
      ...
      0.770318
      0.752701
      0.140932
      1.382087
      1.399704
      1.399704
      0.620578
      0.140932
      0.902441
      1.249964
    
    
      Male
      0.847997
      0.940506
      1.224085
      1.236671
      0.967580
      0.881289
      0.451388
      0.760661
      1.100571
      0.519533
      ...
      0.732686
      1.482984
      0.729328
      1.051130
      0.722502
      0.815813
      1.100686
      0.986826
      0.744265
      1.191872
    
    
      MCXI
      Female
      1.103480
      1.143444
      0.991582
      1.107675
      0.854402
      0.694030
      0.726080
      0.770858
      0.778781
      0.819219
      ...
      0.727167
      0.912995
      0.829034
      0.830966
      1.189421
      0.440668
      0.621603
      0.950352
      1.123938
      0.939730
    
    
      Male
      0.908646
      0.791677
      0.778352
      0.906554
      0.694369
      0.576185
      0.739912
      0.982677
      1.201252
      0.700694
      ...
      0.717776
      1.218094
      0.959577
      0.533562
      0.384950
      0.865914
      0.716370
      0.407031
      1.154012
      0.780784
    
    
      MRCB
      Female
      1.061730
      0.931494
      0.816421
      1.042571
      0.849499
      1.049324
      1.049742
      0.883287
      1.019859
      0.785638
      ...
      0.750025
      0.971749
      0.824298
      1.009443
      0.843628
      0.796654
      0.822314
      0.878994
      1.358604
      1.151930
    
    
      Male
      0.709729
      0.977023
      1.161029
      0.838247
      1.330147
      0.944960
      0.975058
      0.889347
      1.236206
      0.806341
      ...
      1.044911
      0.764179
      1.249225
      0.853134
      0.938566
      0.692231
      0.863824
      0.939024
      1.137082
      1.069660
    
    
      MTDHR
      Female
      0.930460
      0.744413
      0.988941
      1.289813
      0.678974
      0.857590
      0.699437
      0.690904
      0.887231
      0.937675
      ...
      1.240408
      0.959661
      1.003429
      0.781781
      0.843086
      0.838444
      0.593053
      1.029509
      0.999341
      1.115632
    
    
      Male
      1.079389
      0.896940
      0.573891
      0.843673
      0.974346
      1.236041
      0.410690
      0.495746
      1.375715
      0.562091
      ...
      0.843673
      0.986312
      0.967910
      1.260529
      1.367923
      0.655505
      1.062496
      0.986312
      0.263114
      0.655505
    
    
      MVDM
      Female
      0.780171
      1.063989
      1.049175
      1.023769
      0.782795
      0.936155
      0.920899
      0.989726
      1.061581
      0.885429
      ...
      0.994621
      0.925428
      0.867251
      0.874098
      0.655804
      0.703700
      0.874862
      0.903623
      1.167307
      0.853559
    
    
      Male
      1.108791
      0.803048
      1.101583
      1.020802
      1.203405
      1.174855
      1.080197
      0.868000
      0.870836
      1.080857
      ...
      1.061778
      0.883217
      1.179447
      1.004006
      0.925612
      0.865199
      0.773151
      0.736504
      0.886789
      1.301437
    
  

22 rows × 30 columns



In [669]:

    
#plot pivot table as heatmap using seaborn
plt.figure(figsize=(30, 10))
ax = sns.heatmap(genderstd, annot=True, square=True)
plt.setp(ax.xaxis.get_majorticklabels(), rotation=90 )
#plt.tight_layout()
plt.show()



In [670]:

    
ratings3['Section'].unique()









    Out[670]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'BIP', 'MCXI', 'MCS', 'BIC',
       'EXMPLS', 'BIS'], dtype=object)



In [671]:

    
ratings3['Section'] = ratings3['Section'].map({'MCC' :0,'MBD':1,'MVDM':2,'MRCB' : 3, 'MTDHR': 4, 'BIP' : 5, 'MCXI' : 6,'MCS' : 8, 'BIC' : 9, 'EXMPLS' : 10, 'BIS' : 11})



In [672]:

    
ratings3['Section'].unique()









    Out[672]:





array([ 0,  1,  2,  3,  4,  5,  6,  8,  9, 10, 11], dtype=int64)



In [673]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X1 = ratings3.drop(['Section'],axis=1)
Y1 = ratings3.Section.values

# This is the model we'll be using.
from sklearn import tree

# A convenience for displaying visualizations.
from IPython.display import Image

# Packages for rendering our tree.
import pydotplus
import graphviz

# Initialize and train our tree.
decision_tree = tree.DecisionTreeClassifier(
    criterion='gini',
    max_features=2,
    
    random_state = 10
)
decision_tree.fit(X1,Y1)

# Render tree.
dot_data = tree.export_graphviz(
    decision_tree, out_file=None,
    feature_names=X1.columns,
    class_names=ratings2.unique(),
    filled=True
)

from pydotplus import graphviz

graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())









    



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.



In [674]:

    
# Convert data for second feature selection method
X = np.array(ratings3.drop(['Section'],axis=1)) 
Y = np.array(ratings3['Section'])



In [675]:

    
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from itertools import product



In [676]:

    
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf', probability=True)
eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2),
                                    ('svc', clf3)],
                        voting='soft', weights=[2, 1, 2])

clf1.fit(X, Y)
clf2.fit(X, Y)
clf3.fit(X, Y)
eclf.fit(X, Y)









    Out[676]:





VotingClassifier(estimators=[('dt', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0....',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=1, voting='soft',
         weights=[2, 1, 2])

Potential Order of Questions (based on best description of the overall model)

EXMPLS & Bachelors out in the first questions

['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR','MCXI', 'MCS']



In [677]:

    
ratings6 = ratings4
ratings6 = ratings4[ratings4.Section != 'BIP']
ratings7 = ratings6[ratings6.Section != 'EXMPLS']
ratings8 = ratings7[ratings7.Section != 'BIC']
ratings9 = ratings8[ratings8.Section != 'BIS']
ratings10 = ratings9
ratings10.Section.unique()









    Out[677]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS'], dtype=object)



In [678]:

    
#Feature Selection. Scores for the most relevant features (should we start with the one that has more explanatory power)
from sklearn.feature_selection import SelectKBest

# feature extraction
test = SelectKBest()
fit = test.fit(X10, Y10)

# summarize scores
#print(fit.scores_)
#features = fit.transform(X)
#list(zip(X1.columns, features))

#Identify features with highest score from a predictive perspective (for all programs)
names2 = X1.columns
Bestfeatures = pd.DataFrame(fit.scores_, index = names2)
Bestfeatures.columns = ['Potential Order of Questions']
Bestfeatures.sort_values(by=['Potential Order of Questions'], ascending=False)









    Out[678]:







  
    
      
      Potential Order of Questions
    
  
  
    
      Quantifier-Pattern Finding
      26.366009
    
    
      Quantifier-Measuring
      14.806438
    
    
      Distiler-Simplifying
      9.294443
    
    
      Quantifier-Modeling
      8.287137
    
    
      Creator-Making
      8.237509
    
    
      Harmonizer-Including
      7.365993
    
    
      Harmonizer-Consensus Building
      6.525046
    
    
      Benefactor-Empathizing
      6.221896
    
    
      Creator-Expressing
      5.838742
    
    
      Benefactor-Defending
      5.695049
    
    
      Orderer-Policing
      3.883188
    
    
      Benefactor-Developing
      3.715007
    
    
      Catalyst-Activating
      3.478253
    
    
      Harmonizer-Conflict Reducing
      3.426849
    
    
      Innovator-Disrupting
      3.258298
    
    
      Influencer-Advocating
      3.258233
    
    
      Distiler-Packaging
      3.100211
    
    
      Innovator-Brainstorming
      2.999010
    
    
      Investigator-Explaining
      2.691393
    
    
      Influencer-Communicating
      2.471843
    
    
      Orderer-Risk Reducing
      2.428645
    
    
      Creator-Creating
      2.224635
    
    
      Influencer-Selling
      1.914798
    
    
      Investigator-Dissecting
      1.799735
    
    
      Distiller-Connecting
      1.728219
    
    
      Innovator-Testing
      0.820999
    
    
      Investigator-Drilling
      0.720864
    
    
      Catalyst-Orchestrating
      0.634895
    
    
      Orderer-Ordering
      0.601456
    
    
      Catalyst-Driving
      0.554181

MCC



In [679]:

    
ratings10['Section'] = ratings10['Section'].map({'MCC' :1,'MBD':0,'MVDM':0,'MRCB' : 0, 'MTDHR': 0, 'MCXI' : 0,'MCS' : 0, })

ratings10.Section.unique()









    Out[679]:





array([1, 0], dtype=int64)



In [680]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings10_majority = ratings10[ratings10.Section==0]
ratings10_minority = ratings10[ratings10.Section==1]
 
# Upsample minority class
ratings10_minority_upsampled = resample(ratings10_minority, replace=True, n_samples=268, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings10_upsampled = pd.concat([ratings10_majority, ratings10_minority_upsampled])
 
# Display new class counts
ratings10_upsampled.Section.value_counts()









    Out[680]:





1    268
0    268
Name: Section, dtype: int64



In [681]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X2 = ratings10_upsampled.drop(['Section','Gender'],axis=1)
Y2 = ratings10_upsampled.Section.values



In [682]:

    
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf', probability=True)
eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2),
                                    ('svc', clf3)],
                        voting='soft', weights=[1, 1, 1])
from sklearn.naive_bayes import GaussianNB

clf4 = GaussianNB()



In [683]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X2, Y2)


feature_importance2 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance2 = 100.0 * (feature_importance2 / feature_importance2.max())
sorted_idx = np.argsort(feature_importance2)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance2[sorted_idx], align='center')
plt.yticks(pos, X2.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MCC')
plt.show()

MBD



In [684]:

    
ratings8.Section.unique()









    Out[684]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [685]:

    
ratings11 = ratings8[ratings8.Section != 'BIS']

ratings11['Section'] = ratings11['Section'].map({'MCC' :0,'MBD':1,'MVDM':0,'MRCB' : 0, 'MTDHR': 0, 'MCXI' : 0,'MCS' : 0 })

ratings11.Section.unique()
ratings11.Section.value_counts()









    Out[685]:





0    228
1    109
Name: Section, dtype: int64



In [686]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings11_majority = ratings11[ratings11.Section==0]
ratings11_minority = ratings11[ratings11.Section==1]
 
# Upsample minority class
ratings11_minority_upsampled = resample(ratings11_minority, replace=True, n_samples=228, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings11_upsampled = pd.concat([ratings11_majority, ratings11_minority_upsampled])
 
# Display new class counts
ratings11_upsampled.Section.value_counts()









    Out[686]:





1    228
0    228
Name: Section, dtype: int64



In [687]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X3 = ratings11_upsampled.drop(['Section','Gender'],axis=1)
Y3 = ratings11_upsampled.Section.values



In [688]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X3, Y3)


feature_importance3 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance3 = 100.0 * (feature_importance3 / feature_importance3.max())
sorted_idx = np.argsort(feature_importance3)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance3[sorted_idx], align='center')
plt.yticks(pos, X3.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MBD')
plt.show()

MVDM



In [689]:

    
ratings8.Section.unique()









    Out[689]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [690]:

    
ratings12 = ratings8[ratings8.Section != 'BIS']

ratings12['Section'] = ratings12['Section'].map({'MCC' :0,'MBD':0,'MVDM':1,'MRCB' : 0, 'MTDHR': 0, 'MCXI' : 0,'MCS' : 0 })

ratings12.Section.unique()
ratings12.Section.value_counts()









    Out[690]:





0    282
1     55
Name: Section, dtype: int64



In [691]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings12_majority = ratings12[ratings12.Section==0]
ratings12_minority = ratings12[ratings12.Section==1]
 
# Upsample minority class
ratings12_minority_upsampled = resample(ratings12_minority, replace=True, n_samples=282, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings12_upsampled = pd.concat([ratings12_majority, ratings12_minority_upsampled])
 
# Display new class counts
ratings12_upsampled.Section.value_counts()









    Out[691]:





1    282
0    282
Name: Section, dtype: int64



In [692]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X4 = ratings12_upsampled.drop(['Section','Gender'],axis=1)
Y4 = ratings12_upsampled.Section.values



In [693]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X4, Y4)


feature_importance4 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance4 = 100.0 * (feature_importance4/ feature_importance4.max())
sorted_idx = np.argsort(feature_importance4)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance4[sorted_idx], align='center')
plt.yticks(pos, X4.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MVDM')
plt.show()

MRCB



In [694]:

    
ratings8.Section.unique()









    Out[694]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [695]:

    
ratings13 = ratings8[ratings8.Section != 'BIS']

ratings13['Section'] = ratings13['Section'].map({'MCC' :0,'MBD':0,'MVDM':0,'MRCB' : 1, 'MTDHR': 0, 'MCXI' : 0,'MCS' : 0 })

ratings13.Section.unique()
ratings13.Section.value_counts()









    Out[695]:





0    281
1     56
Name: Section, dtype: int64



In [696]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings13_majority = ratings13[ratings13.Section==0]
ratings13_minority = ratings13[ratings13.Section==1]
 
# Upsample minority class
ratings13_minority_upsampled = resample(ratings13_minority, replace=True, n_samples=281, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings13_upsampled = pd.concat([ratings13_majority, ratings13_minority_upsampled])
 
# Display new class counts
ratings13_upsampled.Section.value_counts()









    Out[696]:





1    281
0    281
Name: Section, dtype: int64



In [697]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X5 = ratings13_upsampled.drop(['Section','Gender'],axis=1)
Y5 = ratings13_upsampled.Section.values



In [698]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X5, Y5)


feature_importance5 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance5 = 100.0 * (feature_importance5 / feature_importance5.max())
sorted_idx = np.argsort(feature_importance5)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance5[sorted_idx], align='center')
plt.yticks(pos, X5.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MRCB')
plt.show()

MTDHR



In [699]:

    
ratings8.Section.unique()









    Out[699]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [700]:

    
ratings14 = ratings8[ratings8.Section != 'BIS']

ratings14['Section'] = ratings14['Section'].map({'MCC' :0,'MBD':0,'MVDM':0,'MRCB' : 0, 'MTDHR': 1, 'MCXI' : 0,'MCS' : 0 })

ratings14.Section.unique()
ratings14.Section.value_counts()









    Out[700]:





0    321
1     16
Name: Section, dtype: int64



In [701]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings14_majority = ratings14[ratings14.Section==0]
ratings14_minority = ratings14[ratings14.Section==1]
 
# Upsample minority class
ratings14_minority_upsampled = resample(ratings14_minority, replace=True, n_samples=321, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings14_upsampled = pd.concat([ratings14_majority, ratings14_minority_upsampled])
 
# Display new class counts
ratings14_upsampled.Section.value_counts()









    Out[701]:





1    321
0    321
Name: Section, dtype: int64



In [702]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X6 = ratings14_upsampled.drop(['Section','Gender'],axis=1)
Y6 = ratings14_upsampled.Section.values



In [703]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X6, Y6)


feature_importance6 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance6 = 100.0 * (feature_importance6 / feature_importance6.max())
sorted_idx = np.argsort(feature_importance6)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance6[sorted_idx], align='center')
plt.yticks(pos, X6.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MTDHR')
plt.show()

MCXI



In [704]:

    
ratings8.Section.unique()









    Out[704]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [705]:

    
ratings15 = ratings8[ratings8.Section != 'BIS']

ratings15['Section'] = ratings15['Section'].map({'MCC' :0,'MBD':0,'MVDM':0,'MRCB' : 0, 'MTDHR': 0, 'MCXI' : 1,'MCS' : 0 })

ratings15.Section.unique()
ratings15.Section.value_counts()









    Out[705]:





0    316
1     21
Name: Section, dtype: int64



In [706]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings15_majority = ratings15[ratings15.Section==0]
ratings15_minority = ratings15[ratings15.Section==1]
 
# Upsample minority class
ratings15_minority_upsampled = resample(ratings15_minority, replace=True, n_samples=316, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings15_upsampled = pd.concat([ratings15_majority, ratings15_minority_upsampled])
 
# Display new class counts
ratings15_upsampled.Section.value_counts()









    Out[706]:





1    316
0    316
Name: Section, dtype: int64



In [707]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X7 = ratings15_upsampled.drop(['Section','Gender'],axis=1)
Y7 = ratings15_upsampled.Section.values



In [708]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X7, Y7)


feature_importance7 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance7 = 100.0 * (feature_importance7 / feature_importance7.max())
sorted_idx = np.argsort(feature_importance7)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance7[sorted_idx], align='center')
plt.yticks(pos, X7.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MCXI')
plt.show()

MCS



In [709]:

    
ratings8.Section.unique()









    Out[709]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [710]:

    
ratings16 = ratings8[ratings8.Section != 'BIS']

ratings16['Section'] = ratings16['Section'].map({'MCC' :0,'MBD':0,'MVDM':0,'MRCB' : 0, 'MTDHR': 0, 'MCXI' : 0,'MCS' : 1 })

ratings16.Section.unique()
ratings16.Section.value_counts()









    Out[710]:





0    326
1     11
Name: Section, dtype: int64



In [711]:

    
#Upsample the minority class

# Separate majority and minority classes
ratings16_majority = ratings16[ratings16.Section==0]
ratings16_minority = ratings16[ratings16.Section==1]
 
# Upsample minority class
ratings16_minority_upsampled = resample(ratings16_minority, replace=True, n_samples=326, random_state=123) 
 
# Combine majority class with upsampled minority class
ratings16_upsampled = pd.concat([ratings16_majority, ratings16_minority_upsampled])
 
# Display new class counts
ratings16_upsampled.Section.value_counts()









    Out[711]:





1    326
0    326
Name: Section, dtype: int64



In [712]:

    
#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X8 = ratings16_upsampled.drop(['Section','Gender'],axis=1)
Y8 = ratings16_upsampled.Section.values



In [713]:

    
# We'll make 500 iterations, use 2-deep trees, and set our loss function.
params = {'n_estimators': 500,
          'max_depth': 2,
          'loss': 'deviance'}

#clf1 = ensemble.GradientBoostingClassifier(**params)
clf1.fit(X8, Y8)


feature_importance8 = clf1.feature_importances_

# Make importances relative to max importance.
feature_importance8 = 100.0 * (feature_importance8 / feature_importance8.max())
sorted_idx = np.argsort(feature_importance8)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.figure(figsize=(7, 30))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance8[sorted_idx], align='center')
plt.yticks(pos, X8.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('MCS')
plt.show()



In [714]:

    
# View a list of the features and their importance scores
mcc = list(zip(X2.columns, feature_importance2))
mbd = list(zip(X3.columns, feature_importance3))
mvdm = list(zip(X4.columns, feature_importance4))
mrcb = list(zip(X5.columns, feature_importance5))
mtdhr = list(zip(X6.columns, feature_importance6))
mcxi = list(zip(X7.columns, feature_importance7))
mcs = list(zip(X8.columns, feature_importance8))



In [715]:

    
#Feature Importance for each program
names=['mcc','mbd', 'mvdm','mrcb','mtdhr','mcxi','mcs']
names2 = X2.columns
programfeatures = pd.DataFrame(list(zip(feature_importance2, feature_importance3, feature_importance4, feature_importance5,
                                        feature_importance6, feature_importance7, feature_importance8)),columns = names, index=names2)


programfeatures = programfeatures.round(2)

Summary of relative importance of features to describe each program



In [716]:

    
#Summary Importance of features to describe each of the programs
programfeatures









    Out[716]:







  
    
      
      mcc
      mbd
      mvdm
      mrcb
      mtdhr
      mcxi
      mcs
    
  
  
    
      Catalyst-Driving
      0.00
      0.00
      15.95
      37.70
      0.00
      0.00
      0.00
    
    
      Catalyst-Orchestrating
      0.00
      0.00
      0.00
      0.00
      18.56
      0.00
      0.00
    
    
      Catalyst-Activating
      0.00
      20.14
      0.00
      0.00
      0.00
      0.00
      8.00
    
    
      Orderer-Ordering
      0.00
      0.00
      23.04
      0.00
      0.00
      0.00
      0.00
    
    
      Orderer-Risk Reducing
      0.00
      0.00
      32.64
      0.00
      0.00
      21.85
      0.00
    
    
      Orderer-Policing
      0.00
      0.00
      0.00
      0.00
      12.58
      100.00
      0.00
    
    
      Influencer-Communicating
      0.00
      0.00
      0.00
      24.70
      0.00
      0.00
      0.00
    
    
      Influencer-Advocating
      27.52
      3.36
      0.00
      0.00
      0.00
      0.00
      0.00
    
    
      Influencer-Selling
      5.43
      0.00
      53.43
      0.00
      0.00
      0.00
      0.00
    
    
      Benefactor-Defending
      0.00
      0.00
      0.00
      0.00
      0.00
      32.77
      100.00
    
    
      Benefactor-Empathizing
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
    
    
      Benefactor-Developing
      0.00
      13.38
      0.00
      0.00
      25.70
      0.00
      0.00
    
    
      Harmonizer-Including
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
    
    
      Harmonizer-Conflict Reducing
      0.00
      0.00
      0.00
      0.00
      19.52
      0.00
      0.00
    
    
      Harmonizer-Consensus Building
      38.22
      0.00
      8.50
      0.00
      0.00
      0.00
      0.00
    
    
      Investigator-Drilling
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
    
    
      Investigator-Dissecting
      18.18
      12.72
      0.00
      5.66
      0.00
      0.00
      54.32
    
    
      Investigator-Explaining
      0.00
      0.00
      0.00
      93.01
      0.00
      44.28
      0.00
    
    
      Quantifier-Measuring
      52.28
      100.00
      0.00
      0.00
      0.00
      0.00
      38.79
    
    
      Quantifier-Pattern Finding
      100.00
      27.44
      29.59
      100.00
      0.00
      77.46
      82.32
    
    
      Quantifier-Modeling
      24.70
      39.40
      0.00
      72.98
      35.89
      0.00
      0.00
    
    
      Distiler-Packaging
      0.00
      0.00
      0.00
      36.75
      0.00
      0.00
      0.00
    
    
      Distiler-Simplifying
      0.00
      0.00
      77.32
      0.00
      0.00
      0.00
      0.00
    
    
      Distiller-Connecting
      34.25
      0.00
      0.00
      32.76
      0.00
      0.00
      0.00
    
    
      Innovator-Disrupting
      47.37
      0.00
      35.04
      0.00
      0.00
      0.00
      0.00
    
    
      Innovator-Brainstorming
      41.87
      0.00
      0.00
      40.22
      0.00
      0.00
      0.00
    
    
      Innovator-Testing
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
      0.00
    
    
      Creator-Creating
      0.00
      0.00
      74.37
      16.86
      0.00
      0.00
      0.00
    
    
      Creator-Making
      21.74
      11.42
      0.00
      30.68
      100.00
      56.38
      56.93
    
    
      Creator-Expressing
      0.00
      28.24
      100.00
      0.00
      0.00
      0.00
      0.00

Data prepared for modelling



In [717]:

    
ratings8.Section.unique()









    Out[717]:





array(['MCC', 'MBD', 'MVDM', 'MRCB', 'MTDHR', 'MCXI', 'MCS', 'BIS'], dtype=object)



In [718]:

    
ratings17 = ratings8[ratings8.Section != 'BIS']

ratings17['Section'] = ratings17['Section'].map({'MCC' :0,'MBD':1,'MVDM':2,'MRCB' : 3, 'MTDHR': 4, 'MCXI' : 5,'MCS' : 6 })

#Decision tree, which question is giving us more information

#Convert data so that we can run a tree
X10 = ratings17.drop(['Section','Gender'],axis=1)
Y10 = ratings17.Section.values



In [ ]:

	StartDate	EndDate	Status	IPAddress	Progress	Duration (in seconds)	Finished	RecordedDate	ResponseId	RecipientLastName	...	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score	University - Topics
0	Start Date	End Date	Response Type	IP Address	Progress	Duration (in seconds)	Finished	Recorded Date	Response ID	Recipient Last Name	...	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score	University - Topics
1	{"ImportId":"startDate","timeZone":"Europe/Ber...	{"ImportId":"endDate","timeZone":"Europe/Berlin"}	{"ImportId":"status"}	{"ImportId":"ipAddress"}	{"ImportId":"progress"}	{"ImportId":"duration"}	{"ImportId":"finished"}	{"ImportId":"recordedDate","timeZone":"Europe/...	{"ImportId":"_recordId"}	{"ImportId":"recipientLastName"}	...	{"ImportId":"Orderer_Score"}	{"ImportId":"Influencer_Score"}	{"ImportId":"Benefactor_Score"}	{"ImportId":"Harmonizer_Score"}	{"ImportId":"Investigator_Score"}	{"ImportId":"Quantifier_Score"}	{"ImportId":"Distiller_Score"}	{"ImportId":"Innovator_Score"}	{"ImportId":"Creator_Score"}	{"ImportId":"University_c0047d9203b54c45913930...
2	22/11/17 19:40	22/11/17 19:42	0	159.147.77.160	100	101	1	22/11/17 19:42	R_1FEes0inThNaSwJ	Arevalo	...	12	11	11	11	12	10	11	11	14	Unknown
3	22/11/17 19:39	22/11/17 19:42	0	62.151.145.42	100	190	1	22/11/17 19:42	R_bswgyUCVb5YgvlP	Kuckreja	...	7	6	15	13	10	5	9	13	5	NaN
4	22/11/17 19:39	22/11/17 19:42	0	78.30.8.64	100	176	1	22/11/17 19:42	R_42yxYo5FhixO5mZ	Hernndez	...	9	11	6	7	13	15	13	13	11	Unknown

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Catalyst_Score	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score
0	5	5	1	4	2	1	3	2	1	5	...	11	7	6	15	13	10	5	9	13	5
1	4	4	3	4	4	1	4	4	3	3	...	11	9	11	6	7	13	15	13	13	11
2	5	4	5	1	2	4	4	3	5	3	...	14	7	12	11	12	8	4	7	13	14
3	5	4	4	4	4	3	3	3	3	4	...	13	11	9	14	15	12	8	11	11	10
4	2	2	3	3	3	4	5	4	2	4	...	7	10	11	14	13	8	10	9	8	7

	Section	Gender	Catalyst_Score	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score
0	MCC-01	Male	11	7	6	15	13	10	5	9	13	5
1	MBD-01	Hombre	11	9	11	6	7	13	15	13	13	11
2	MVDM-01	Mujer	14	7	12	11	12	8	4	7	13	14
3	MCC-02	Female	13	11	9	14	15	12	8	11	11	10
4	MRCB-01	Mujer	7	10	11	14	13	8	10	9	8	7

	Section	Gender	Catalyst_Score	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score
0	MCC	0	11	7	6	15	13	10	5	9	13	5
1	MBD	0	11	9	11	6	7	13	15	13	13	11
2	MVDM	1	14	7	12	11	12	8	4	7	13	14
3	MCC	1	13	11	9	14	15	12	8	11	11	10
4	MRCB	1	7	10	11	14	13	8	10	9	8	7

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Quantifier-Modeling	Distiler-Packaging	Distiler-Simplifying	Distiller-Connecting	Innovator-Disrupting	Innovator-Brainstorming	Innovator-Testing	Creator-Creating	Creator-Making	Creator-Expressing
0	5	5	1	4	2	1	3	2	1	5	...	3	4	3	2	5	5	3	1	1	3
1	4	4	3	4	4	1	4	4	3	3	...	5	5	4	4	5	4	4	5	1	5
2	5	4	5	1	2	4	4	3	5	3	...	2	2	2	3	5	5	3	5	5	4
3	5	4	4	4	4	3	3	3	3	4	...	3	5	2	4	4	4	3	4	2	4
4	2	2	3	3	3	4	5	4	2	4	...	1	5	3	1	4	2	2	2	1	4

	Section	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	...	Quantifier-Modeling	Distiler-Packaging	Distiler-Simplifying	Distiller-Connecting	Innovator-Disrupting	Innovator-Brainstorming	Innovator-Testing	Creator-Creating	Creator-Making	Creator-Expressing
0	MCC	1.209416	1.209416	-1.382189	0.561514	-0.734288	-1.382189	-0.086387	-0.734288	-1.382189	...	-0.086387	0.561514	-0.086387	-0.734288	1.209416	1.209416	-0.086387	-1.382189	-1.382189	-0.086387
1	MBD	0.293047	0.293047	-0.506172	0.293047	0.293047	-2.104610	0.293047	0.293047	-0.506172	...	1.092266	1.092266	0.293047	0.293047	1.092266	0.293047	0.293047	1.092266	-2.104610	1.092266
2	MVDM	1.179536	0.442326	1.179536	-1.769303	-1.032094	0.442326	0.442326	-0.294884	1.179536	...	-1.032094	-1.032094	-1.032094	-0.294884	1.179536	1.179536	-0.294884	1.179536	1.179536	0.442326
3	MCC	1.269622	0.211604	0.211604	0.211604	0.211604	-0.846415	-0.846415	-0.846415	-0.846415	...	-0.846415	1.269622	-1.904433	0.211604	0.211604	0.211604	-0.846415	0.211604	-1.904433	0.211604
4	MRCB	-0.924711	-0.924711	-0.174945	-0.174945	-0.174945	0.574820	1.324586	0.574820	-0.924711	...	-1.674477	1.324586	-0.174945	-1.674477	0.574820	-0.924711	-0.924711	-0.924711	-1.674477	0.574820

	0
Catalyst-Driving	0.170144
Catalyst-Orchestrating	0.158902
Catalyst-Activating	0.227504
Orderer-Ordering	0.226261
Orderer-Risk Reducing	0.267490
Orderer-Policing	0.354668
Influencer-Communicating	0.171778
Influencer-Advocating	0.228359
Influencer-Selling	0.223396
Benefactor-Defending	0.306280
Benefactor-Empathizing	0.241770
Benefactor-Developing	0.227719
Harmonizer-Including	0.353975
Harmonizer-Conflict Reducing	0.296425
Harmonizer-Consensus Building	0.242120
Investigator-Drilling	0.169565
Investigator-Dissecting	0.263118
Investigator-Explaining	0.289203
Quantifier-Measuring	0.374427
Quantifier-Pattern Finding	0.478853
Quantifier-Modeling	0.290082
Distiler-Packaging	0.263731
Distiler-Simplifying	0.298801
Distiller-Connecting	0.190971
Innovator-Disrupting	0.227926
Innovator-Brainstorming	0.219418
Innovator-Testing	0.222475
Creator-Creating	0.342205
Creator-Making	0.554117
Creator-Expressing	0.392407

		Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Quantifier-Modeling	Distiler-Packaging	Distiler-Simplifying	Distiller-Connecting	Innovator-Disrupting	Innovator-Brainstorming	Innovator-Testing	Creator-Creating	Creator-Making	Creator-Expressing
Section	Gender
BIC	Female	0.904939	0.872116	0.822095	0.973654	0.809539	1.175637	0.881327	0.915881	1.009397	0.838853	...	0.704232	1.010022	0.934579	0.816238	0.877077	0.769523	0.855665	0.846945	1.215148	0.983258
BIC	Male	0.916673	0.949708	0.568862	1.066929	0.670931	0.943651	0.833709	0.735849	0.926195	0.963360	...	0.890970	0.677827	0.899031	0.877260	0.845041	0.555910	1.008002	0.569796	1.225553	1.209313
BIP	Female	0.920899	0.837324	0.891017	0.986207	1.012120	1.008770	0.783151	1.116202	0.943666	0.795912	...	0.805330	0.946386	0.984897	0.980505	0.850914	0.703871	0.700137	0.821414	1.291959	0.985268
BIP	Male	1.576784	0.856735	0.807865	1.341317	0.636537	0.864993	0.993638	0.349579	0.725915	0.369349	...	0.769034	1.030830	1.118326	0.813404	1.245786	0.831501	0.985702	0.925035	0.470579	1.211218
BIS	Female	0.849324	0.703938	0.892803	0.680088	0.851200	1.186007	1.146101	1.395997	1.065505	1.137488	...	1.179748	1.027930	0.863574	0.787213	1.077894	0.944229	0.816371	0.782312	1.166962	1.228054
BIS	Male	0.793491	1.169425	0.798543	1.173901	0.999375	1.082589	0.852286	0.847487	1.161168	1.105357	...	0.906073	0.698020	0.827069	1.205712	0.614826	0.825017	0.595281	0.943332	1.248330	1.167417
EXMPLS	Female	1.152165	0.876999	0.923853	1.121024	0.629217	0.763406	0.662094	0.615469	0.671414	0.850886	...	0.831035	0.923429	1.114360	0.779570	0.444161	0.716024	0.827846	0.732319	1.450625	0.524999
EXMPLS	Male	1.098929	1.148096	1.092736	0.961729	0.923793	0.824262	0.920684	0.721370	1.199145	0.695955	...	0.898863	0.783376	1.171911	1.044906	0.563015	0.411410	0.724516	0.925079	0.473617	1.202033
MBD	Female	1.082836	0.775090	0.987653	1.137435	0.735068	0.978456	1.155907	1.003446	1.099620	0.953381	...	0.897933	1.123441	0.659827	0.729776	0.720390	0.683212	0.966098	0.959777	1.066398	0.980222
MBD	Male	1.091367	0.930980	1.052609	1.122787	0.934723	1.071034	0.891503	0.928695	1.124414	0.981050	...	0.932534	0.910329	0.865463	0.799108	0.922054	0.702756	0.880043	0.944937	0.969822	1.142993
MCC	Female	0.879987	0.862646	0.851446	1.203403	0.704975	1.101269	0.837664	0.751255	0.889386	0.798010	...	1.067279	0.893329	0.860317	0.880564	0.987756	0.880946	0.748833	0.938364	1.148180	1.272019
MCC	Male	0.871454	1.156833	1.080689	0.989483	0.726458	1.254711	0.839616	1.003250	0.875941	0.821394	...	0.863381	1.023079	0.654271	0.786529	0.991654	0.938773	1.032405	0.856433	1.589282	0.873722
MCS	Female	0.638194	0.752701	0.752701	0.123315	1.267580	0.752701	0.638194	1.249964	0.638194	0.752701	...	0.770318	0.752701	0.140932	1.382087	1.399704	1.399704	0.620578	0.140932	0.902441	1.249964
MCS	Male	0.847997	0.940506	1.224085	1.236671	0.967580	0.881289	0.451388	0.760661	1.100571	0.519533	...	0.732686	1.482984	0.729328	1.051130	0.722502	0.815813	1.100686	0.986826	0.744265	1.191872
MCXI	Female	1.103480	1.143444	0.991582	1.107675	0.854402	0.694030	0.726080	0.770858	0.778781	0.819219	...	0.727167	0.912995	0.829034	0.830966	1.189421	0.440668	0.621603	0.950352	1.123938	0.939730
MCXI	Male	0.908646	0.791677	0.778352	0.906554	0.694369	0.576185	0.739912	0.982677	1.201252	0.700694	...	0.717776	1.218094	0.959577	0.533562	0.384950	0.865914	0.716370	0.407031	1.154012	0.780784
MRCB	Female	1.061730	0.931494	0.816421	1.042571	0.849499	1.049324	1.049742	0.883287	1.019859	0.785638	...	0.750025	0.971749	0.824298	1.009443	0.843628	0.796654	0.822314	0.878994	1.358604	1.151930
MRCB	Male	0.709729	0.977023	1.161029	0.838247	1.330147	0.944960	0.975058	0.889347	1.236206	0.806341	...	1.044911	0.764179	1.249225	0.853134	0.938566	0.692231	0.863824	0.939024	1.137082	1.069660
MTDHR	Female	0.930460	0.744413	0.988941	1.289813	0.678974	0.857590	0.699437	0.690904	0.887231	0.937675	...	1.240408	0.959661	1.003429	0.781781	0.843086	0.838444	0.593053	1.029509	0.999341	1.115632
MTDHR	Male	1.079389	0.896940	0.573891	0.843673	0.974346	1.236041	0.410690	0.495746	1.375715	0.562091	...	0.843673	0.986312	0.967910	1.260529	1.367923	0.655505	1.062496	0.986312	0.263114	0.655505
MVDM	Female	0.780171	1.063989	1.049175	1.023769	0.782795	0.936155	0.920899	0.989726	1.061581	0.885429	...	0.994621	0.925428	0.867251	0.874098	0.655804	0.703700	0.874862	0.903623	1.167307	0.853559
MVDM	Male	1.108791	0.803048	1.101583	1.020802	1.203405	1.174855	1.080197	0.868000	0.870836	1.080857	...	1.061778	0.883217	1.179447	1.004006	0.925612	0.865199	0.773151	0.736504	0.886789	1.301437

	Potential Order of Questions
Quantifier-Pattern Finding	26.366009
Quantifier-Measuring	14.806438
Distiler-Simplifying	9.294443
Quantifier-Modeling	8.287137
Creator-Making	8.237509
Harmonizer-Including	7.365993
Harmonizer-Consensus Building	6.525046
Benefactor-Empathizing	6.221896
Creator-Expressing	5.838742
Benefactor-Defending	5.695049
Orderer-Policing	3.883188
Benefactor-Developing	3.715007
Catalyst-Activating	3.478253
Harmonizer-Conflict Reducing	3.426849
Innovator-Disrupting	3.258298
Influencer-Advocating	3.258233
Distiler-Packaging	3.100211
Innovator-Brainstorming	2.999010
Investigator-Explaining	2.691393
Influencer-Communicating	2.471843
Orderer-Risk Reducing	2.428645
Creator-Creating	2.224635
Influencer-Selling	1.914798
Investigator-Dissecting	1.799735
Distiller-Connecting	1.728219
Innovator-Testing	0.820999
Investigator-Drilling	0.720864
Catalyst-Orchestrating	0.634895
Orderer-Ordering	0.601456
Catalyst-Driving	0.554181

	mcc	mbd	mvdm	mrcb	mtdhr	mcxi	mcs
Catalyst-Driving	0.00	0.00	15.95	37.70	0.00	0.00	0.00
Catalyst-Orchestrating	0.00	0.00	0.00	0.00	18.56	0.00	0.00
Catalyst-Activating	0.00	20.14	0.00	0.00	0.00	0.00	8.00
Orderer-Ordering	0.00	0.00	23.04	0.00	0.00	0.00	0.00
Orderer-Risk Reducing	0.00	0.00	32.64	0.00	0.00	21.85	0.00
Orderer-Policing	0.00	0.00	0.00	0.00	12.58	100.00	0.00
Influencer-Communicating	0.00	0.00	0.00	24.70	0.00	0.00	0.00
Influencer-Advocating	27.52	3.36	0.00	0.00	0.00	0.00	0.00
Influencer-Selling	5.43	0.00	53.43	0.00	0.00	0.00	0.00
Benefactor-Defending	0.00	0.00	0.00	0.00	0.00	32.77	100.00
Benefactor-Empathizing	0.00	0.00	0.00	0.00	0.00	0.00	0.00
Benefactor-Developing	0.00	13.38	0.00	0.00	25.70	0.00	0.00
Harmonizer-Including	0.00	0.00	0.00	0.00	0.00	0.00	0.00
Harmonizer-Conflict Reducing	0.00	0.00	0.00	0.00	19.52	0.00	0.00
Harmonizer-Consensus Building	38.22	0.00	8.50	0.00	0.00	0.00	0.00
Investigator-Drilling	0.00	0.00	0.00	0.00	0.00	0.00	0.00
Investigator-Dissecting	18.18	12.72	0.00	5.66	0.00	0.00	54.32
Investigator-Explaining	0.00	0.00	0.00	93.01	0.00	44.28	0.00
Quantifier-Measuring	52.28	100.00	0.00	0.00	0.00	0.00	38.79
Quantifier-Pattern Finding	100.00	27.44	29.59	100.00	0.00	77.46	82.32
Quantifier-Modeling	24.70	39.40	0.00	72.98	35.89	0.00	0.00
Distiler-Packaging	0.00	0.00	0.00	36.75	0.00	0.00	0.00
Distiler-Simplifying	0.00	0.00	77.32	0.00	0.00	0.00	0.00
Distiller-Connecting	34.25	0.00	0.00	32.76	0.00	0.00	0.00
Innovator-Disrupting	47.37	0.00	35.04	0.00	0.00	0.00	0.00
Innovator-Brainstorming	41.87	0.00	0.00	40.22	0.00	0.00	0.00
Innovator-Testing	0.00	0.00	0.00	0.00	0.00	0.00	0.00
Creator-Creating	0.00	0.00	74.37	16.86	0.00	0.00	0.00
Creator-Making	21.74	11.42	0.00	30.68	100.00	56.38	56.93
Creator-Expressing	0.00	28.24	100.00	0.00	0.00	0.00	0.00

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Catalyst_Score	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score
0	5	5	1	4	2	1	3	2	1	5	...	11	7	6	15	13	10	5	9	13	5
1	4	4	3	4	4	1	4	4	3	3	...	11	9	11	6	7	13	15	13	13	11
2	5	4	5	1	2	4	4	3	5	3	...	14	7	12	11	12	8	4	7	13	14
3	5	4	4	4	4	3	3	3	3	4	...	13	11	9	14	15	12	8	11	11	10
4	2	2	3	3	3	4	5	4	2	4	...	7	10	11	14	13	8	10	9	8	7

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Quantifier-Modeling	Distiler-Packaging	Distiler-Simplifying	Distiller-Connecting	Innovator-Disrupting	Innovator-Brainstorming	Innovator-Testing	Creator-Creating	Creator-Making	Creator-Expressing
0	5	5	1	4	2	1	3	2	1	5	...	3	4	3	2	5	5	3	1	1	3
1	4	4	3	4	4	1	4	4	3	3	...	5	5	4	4	5	4	4	5	1	5
2	5	4	5	1	2	4	4	3	5	3	...	2	2	2	3	5	5	3	5	5	4
3	5	4	4	4	4	3	3	3	3	4	...	3	5	2	4	4	4	3	4	2	4
4	2	2	3	3	3	4	5	4	2	4	...	1	5	3	1	4	2	2	2	1	4

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Catalyst_Score	Orderer_Score	Influencer_Score	Benefactor_Score	Harmonizer_Score	Investigator_Score	Quantifier_Score	Distiller_Score	Innovator_Score	Creator_Score
0	5	5	1	4	2	1	3	2	1	5	...	11	7	6	15	13	10	5	9	13	5
1	4	4	3	4	4	1	4	4	3	3	...	11	9	11	6	7	13	15	13	13	11
2	5	4	5	1	2	4	4	3	5	3	...	14	7	12	11	12	8	4	7	13	14
3	5	4	4	4	4	3	3	3	3	4	...	13	11	9	14	15	12	8	11	11	10
4	2	2	3	3	3	4	5	4	2	4	...	7	10	11	14	13	8	10	9	8	7

	Catalyst-Driving	Catalyst-Orchestrating	Catalyst-Activating	Orderer-Ordering	Orderer-Risk Reducing	Orderer-Policing	Influencer-Communicating	Influencer-Advocating	Influencer-Selling	Benefactor-Defending	...	Quantifier-Modeling	Distiler-Packaging	Distiler-Simplifying	Distiller-Connecting	Innovator-Disrupting	Innovator-Brainstorming	Innovator-Testing	Creator-Creating	Creator-Making	Creator-Expressing
0	5	5	1	4	2	1	3	2	1	5	...	3	4	3	2	5	5	3	1	1	3
1	4	4	3	4	4	1	4	4	3	3	...	5	5	4	4	5	4	4	5	1	5
2	5	4	5	1	2	4	4	3	5	3	...	2	2	2	3	5	5	3	5	5	4
3	5	4	4	4	4	3	3	3	3	4	...	3	5	2	4	4	4	3	4	2	4
4	2	2	3	3	3	4	5	4	2	4	...	1	5	3	1	4	2	2	2	1	4